home *** CD-ROM | disk | FTP | other *** search
/ Apple WWDC 1996 / WWDC96_1996 (CD).toast / Technology Materials / MacOS 8 Resources / Developer Tools / Mac OS 8 Interfaces & Libraries / Interfaces / PInterfaces / TextParser.p < prev    next >
Text File  |  1996-05-01  |  12KB  |  241 lines

  1. {
  2.      File:        TextParser.p
  3.  
  4.      Contains:    International Text Parser Interfaces.
  5.  
  6.      Version:    Technology:    System 8
  7.                  Release:    Universal Interfaces 3.0d3 on Copland DR1
  8.  
  9.      Copyright:    © 1984-1996 by Apple Computer, Inc.  All rights reserved.
  10.  
  11.      Bugs?:        If you find a problem with this file, send the file and version
  12.                  information (from above) and the problem description to:
  13.  
  14.                      Internet:    apple.bugs@applelink.apple.com
  15.                      AppleLink:    APPLE.BUGS
  16.  
  17. }
  18. {$IFC UNDEFINED UsingIncludes}
  19. {$SETC UsingIncludes := 0}
  20. {$ENDC}
  21.  
  22. {$IFC NOT UsingIncludes}
  23.  UNIT TextParser;
  24.  INTERFACE
  25. {$ENDC}
  26.  
  27. {$IFC UNDEFINED __TEXTPARSER__}
  28. {$SETC __TEXTPARSER__ := 1}
  29.  
  30. {$I+}
  31. {$SETC TextParserIncludes := UsingIncludes}
  32. {$SETC UsingIncludes := 1}
  33.  
  34. {$IFC UNDEFINED __CONDITIONALMACROS__}
  35. {$I ConditionalMacros.p}
  36. {$ENDC}
  37. {$IFC FOR_SYSTEM8_PREEMPTIVE }
  38. {$IFC UNDEFINED __TYPES__}
  39. {$I Types.p}
  40. {$ENDC}
  41. {$IFC UNDEFINED __TEXTOBJECTS__}
  42. {$I TextObjects.p}
  43. {$ENDC}
  44. {$IFC UNDEFINED __TEXTCOMMON__}
  45. {$I TextCommon.p}
  46. {$ENDC}
  47. {$IFC UNDEFINED __LOCALEOBJECTS__}
  48. {$I LocaleObjects.p}
  49. {$ENDC}
  50. {$ENDC}
  51.  
  52. {$PUSH}
  53. {$ALIGN MAC68K}
  54. {$LibExport+}
  55.  
  56. {$IFC FOR_SYSTEM8_PREEMPTIVE }
  57.  
  58. TYPE
  59.     TTextParseRef = ^LONGINT;
  60.     ParsingToken                        = UInt32;
  61.     BasicToken                            = ParsingToken;
  62.     RangeToken                            = ParsingToken;
  63.     KeywordToken                        = ParsingToken;
  64.     MultiChoiceToken                    = ParsingToken;
  65.     ComposedToken                        = ParsingToken;
  66. { defintions for the future }
  67.     RegExpToken                            = ParsingToken;
  68.     GrammarRuleToken                    = ParsingToken;
  69. { definition of Pointers to tokens }
  70.     ParsingTokenPtr                        = ^ParsingToken;
  71.     BasicTokenPtr                        = ParsingTokenPtr;
  72.     RangeTokenPtr                        = ParsingTokenPtr;
  73.     KeywordTokenPtr                        = ParsingTokenPtr;
  74.     MultiChoiceTokenPtr                    = ParsingTokenPtr;
  75.     ComposedTokenPtr                    = ParsingTokenPtr;
  76.     ParsingOptionBits                    = OptionBits;
  77.     ParserLocaleObjectRef                = LocaleObjectRef;
  78.  
  79. CONST
  80.                                                                 { ParsingOptionBits }
  81.     kParseMainTokenOptionBit    = 0;
  82.     kIsAlphaNumericKeyWordOptionBit = 1;
  83.     kIsCaseSensitiveParsingOptionBit = 2;
  84.     kIsDiacreticSensitiveParsingOptionBit = 3;
  85.     kParseTextEnclosedByLitearlDelimitersOptionBit = 4;
  86.     kIsCommentsAllowedOptionBit    = 5;
  87.  
  88.                                                                 { ParsingOptionMask }
  89.     kStandardParsingOptionsMask    = 0;
  90.     kParseMainTokenOptionMask    = $00000001;
  91.     kIsAlphaNumericKeyWordOptionMask = $00000002;
  92.     kIsCaseSensitiveParsingOptionMask = $00000004;
  93.     kIsDiacreticSensitiveParsingOptionMask = $00000008;
  94.     kParseTextEnclosedByLitearlDelimitersOptionMask = $00000010;
  95.     kIsCommentsAllowedOptionMask = $00000020;
  96.  
  97. {  Basic Tokens definitions (try to match system 7 tokens when possible }
  98.     kBasicTokenUnknown            = 0;                            { characters that don't have token definition }
  99.     kBasicTokenEscape            = 10;                            { character escape (e.g. '\' in "\n", "\t") }
  100.     kBasicTokenLeftParen        = 16;                            { open parenthesis }
  101.     kBasicTokenRightParen        = 17;                            { close parenthesis }
  102.     kBasicTokenLeftBracket        = 18;                            { open square bracket }
  103.     kBasicTokenRightBracket        = 19;                            { close square bracket }
  104.     kBasicTokenLeftCurly        = 20;
  105.     kBasicTokenRightCurly        = 21;                            { close curly bracket }
  106.     kBasicTokentokenLeftEnclose    = 22;                            { open guillemet }
  107.     kBasicTokenRightEnclose        = 23;                            { close guillemet }
  108.     kBasicTokenPlus                = 24;
  109.     kBasicTokenMinus            = 25;
  110.     kBasicTokenAsterisk            = 26;                            { times/multiply }
  111.     kBasicTokenDivide            = 27;
  112.     kBasicTokenPlusMinus        = 28;                            { plus or minus symbol }
  113.     kBasicTokenSlash            = 29;
  114.     kBasicTokenBackSlash        = 30;
  115.     kBasicTokenLess                = 31;                            { less than symbol }
  116.     kBasicTokenGreat            = 32;                            { greater than symbol }
  117.     kBasicTokenEqual            = 33;
  118.     kBasicTokenLessEqual1        = 35;                            { less than or equal, 1 character }
  119.     kBasicTokenGreatEqual1        = 37;                            { greater than or equal, 1 character }
  120.     kBasicTokenNotEqual            = 40;                            { not equal, 1 character }
  121.     kBasicTokenExclam            = 43;                            { exclamation point }
  122.     kBasicTokenTilde            = 44;                            { centered tilde }
  123.     kBasicTokenComma            = 45;
  124.     kBasicTokenPeriod            = 46;
  125.     kBasicTokenLeft2Quote        = 47;                            { open double quote }
  126.     kBasicTokenRight2Quote        = 48;                            { close double quote }
  127.     kBasicTokenLeft1Quote        = 49;                            { open single quote }
  128.     kBasicTokenRight1Quote        = 50;                            { close single quote }
  129.     kBasicToken2Quote            = 51;                            { double quote }
  130.     kBasicToken1Quote            = 52;                            { single quote }
  131.     kBasicTokenSemicolon        = 53;
  132.     kBasicTokenPercent            = 54;
  133.     kBasicTokenCaret            = 55;
  134.     kBasicTokenUnderline        = 56;
  135.     kBasicTokenAmpersand        = 57;
  136.     kBasicTokenAtSign            = 58;
  137.     kBasicTokenBar                = 59;                            { vertical bar }
  138.     kBasicTokenQuestion            = 60;
  139.     kBasicTokenPi                = 61;                            { lower-case pi }
  140.     kBasicTokenRoot                = 62;                            { square root symbol }
  141.     kBasicTokenSigma            = 63;                            { capital sigma }
  142.     kBasicTokenIntegral            = 64;                            { integral sign }
  143.     kBasicTokenMicro            = 65;
  144.     kBasicTokenCapPi            = 66;                            { capital pi }
  145.     kBasicTokenInfinity            = 67;
  146.     kBasicTokenColon            = 68;
  147.     kBasicTokenHash                = 69;                            { e.g. # }
  148.     kBasicTokenDollar            = 70;
  149.     kBasicTokenNoBreakSpace        = 71;                            { non-breaking space }
  150.     kBasicTokenFraction            = 72;
  151.     kBasicTokenIntlCurrency        = 73;
  152.     kBasicTokenLeftSingGuillemet = 74;
  153.     kBasicTokenRightSingGuillemet = 75;
  154.     kBasicTokenPerThousand        = 76;
  155.     kBasicTokenEllipsis            = 77;
  156.     kBasicTokenCenterDot        = 78;
  157.     kBasicTokenSpace            = 128;
  158.     kBasicTokenPasswordChar        = 129;
  159.  
  160.                                                                 { predefined composed tokens }
  161.     kComposedTokenLessEqual2    = 16383;                        { less than or equal, 2 characters (e.g. <=) }
  162.     kComposedTokenEqual2        = 16384;                        { greater than or equal, 2 characters (e.g. >=) }
  163.     kComposedToken2Equal        = 16385;                        { double equal (e.g. ==) }
  164.     kComposedTokenColonEqual    = 16386;                        { colon equal }
  165.     kComposedTokenLessGreat        = 16387;                        { less/greater, Pascal not equal (e.g. <>) }
  166.     kComposedTokenExclamEqual    = 16388;                        { exclamation equal, C not equal (e.g. !=) }
  167.     kComposedTokenNewLine        = 16389;                        { composed may be more than one (10,13) }
  168.  
  169.                                                                 { predefined multiple choice tokens }
  170.     kChoiceTokenWhite            = 8192;                            { white space (CR,LF,space, non break space) }
  171.     kChoiceTokenLeftLit            = 8193;                            { literal begin , can be one or more }
  172.     kChoiceTokenRightLit        = 8194;                            { literal end, can be one or more }
  173.     kChoiceTokenAlpha            = 8195;                            { alphabetic char }
  174.     kChoiceTokenNumeric            = 8196;                            { numeric char }
  175.     kChoiceTokenPunctuationChar    = 8197;                            { punctuation char }
  176.     kChoiceTokenRightComment    = 8198;                            {  can be one or more }
  177.     kChoiceTokenLeftComment        = 8199;                            {  can be one or more }
  178.  
  179.                                                                 { predefined token classes (not overridable for 8.0) mapped to regular expression tokens }
  180.     kTokenClassIdentifier        = 32768;                        {   alphabetic or alphanumeric entities }
  181.     kTokenClassComment            = 32769;                        {     anything inside comment delimiters  }
  182.     kTokenClassliteral            = 32770;                        {     anything inside literal delimiters  }
  183.     kTokenClassIntegerNumber    = 32771;                        {  text is numeric presentation (integers) }
  184.     kTokenClassFloatingNumber    = 32772;                        { text is numeric presentation (float) }
  185.  
  186.  
  187. TYPE
  188.     CharacterRangePtr = ^CharacterRange;
  189.     CharacterRange = RECORD
  190.                                                                         {  structure defining a range of char }
  191.         theCharRangeBegin:        UInt32;
  192.         theCharRangeEnd:        UInt32;
  193.     END;
  194.  
  195. { ============================================================locale object parser APIs==================================================== }
  196. FUNCTION GetParserObjectByLocaleID(theLocaleIdentifier: LocaleIdentifier; theEncoding: TextEncoding; VAR theParserObject: ParserLocaleObjectRef): OSStatus; C;
  197. FUNCTION GetParserObjectByLocaleRef(locale: LocaleRef; theEncoding: TextEncoding; VAR theParserObject: ParserLocaleObjectRef): OSStatus; C;
  198. FUNCTION GetCurrentParserObject(VAR theParserObject: ParserLocaleObjectRef): OSStatus; C;
  199. FUNCTION GetParserObjectInfo(theParserObject: ParserLocaleObjectRef; VAR theEncoding: TextEncoding; VAR theLocaleIdentifier: LocaleIdentifier): OSStatus; C;
  200. { ======================================================Iterator creation/modifcation========================================================== }
  201. FUNCTION CreateLexicalIterator(localeParserRef: ParserLocaleObjectRef; theTextObject: TextObject; startIndex: TextObjectIndex; endIndex: TextObjectIndex; theParsingOptions: ParsingOptionBits; VAR theParserRef: TTextParseRef): OSStatus; C;
  202. FUNCTION LexicalIteratorGetNextToken(theParser: TTextParseRef; VAR theCurrentToken: ParsingToken; VAR startIndex: TextObjectIndex; VAR endIndex: TextObjectIndex): OSStatus; C;
  203. FUNCTION LexicalIteratorSetCommentDelimiters(refLexicalIterator: TTextParseRef; tokenLeftDel: ParsingToken; tokenRightDel: ParsingToken): OSStatus; C;
  204. FUNCTION LexicalIteratorReset(refLexicalIterator: TTextParseRef): OSStatus; C;
  205. FUNCTION LexicalIteratorSetText(refLexicalIterator: TTextParseRef; theTextObject: TextObject; startIndex: TextObjectIndex; endIndex: TextObjectIndex): OSStatus; C;
  206. FUNCTION LexicalIteratorSetTokenToMatch(refLexicalIterator: TTextParseRef; theToken: ComposedToken): OSStatus; C;
  207. { ======================================================Tokens Get/Set APIs ========================================================== }
  208. FUNCTION LexicalIteratorDefineBasicToken(refLexicalIterator: TTextParseRef; theChar: UInt32; theToken: BasicToken): OSStatus; C;
  209. FUNCTION GetUniqueBasicToken(refLexicalIterator: TTextParseRef; VAR theToken: BasicToken): OSStatus; C;
  210. FUNCTION LexicalIteratorDefineKeywordToken(refLexicalIterator: TTextParseRef; theText: TextObject; theStart: TextObjectIndex; theEnd: TextObjectIndex; theToken: KeywordToken): OSStatus; C;
  211. FUNCTION GetUniqueKeywordToken(refLexicalIterator: TTextParseRef; VAR theToken: KeywordToken): OSStatus; C;
  212. FUNCTION LexicalIteratorDefineRangeToken(refLexicalIterator: TTextParseRef; rangeBeginChar: UInt32; rangeEndChar: UInt32; theToken: RangeToken): OSStatus; C;
  213. FUNCTION GetUniqueRangeToken(refLexicalIterator: TTextParseRef; VAR theToken: RangeToken): OSStatus; C;
  214. FUNCTION LexicalIteratorDefineMultiChoiceToken(refLexicalIterator: TTextParseRef; nChoices: ItemCount; VAR theChoices: ParsingToken; VAR theToken: MultiChoiceToken): OSStatus; C;
  215. FUNCTION GetUniqueMultiChoiceToken(refLexicalIterator: TTextParseRef; VAR theToken: MultiChoiceToken): OSStatus; C;
  216. FUNCTION LexicalIteratorDefineComposedToken(refLexicalIterator: TTextParseRef; nChoices: ItemCount; VAR theChoices: ParsingToken; VAR theToken: ComposedToken): OSStatus; C;
  217. FUNCTION GetUniqueComposedToken(refLexicalIterator: TTextParseRef; VAR theToken: ComposedToken): OSStatus; C;
  218. FUNCTION GetTextFromToken(refLexicalIterator: TTextParseRef; theToken: ParsingToken; TheText: TextObject): OSStatus; C;
  219. FUNCTION GetLocaleTextFromToken(localeParserRef: ParserLocaleObjectRef; theToken: ParsingToken; TheText: TextObject): OSStatus; C;
  220. FUNCTION GetRangeToken(refLexicalIterator: TTextParseRef; theToken: RangeToken; VAR beginRangeChar: UInt32; VAR endRangeChar: UInt32): OSStatus; C;
  221. FUNCTION GetLocaleRangeToken(localeParserRef: ParserLocaleObjectRef; theToken: RangeToken; VAR beginRangeChar: UInt32; VAR endRangeChar: UInt32): OSStatus; C;
  222. FUNCTION GetCountTokenChoices(refLexicalIterator: TTextParseRef; theToken: MultiChoiceToken; VAR nChoices: ItemCount): OSStatus; C;
  223. FUNCTION GetLocaleCountTokenChoices(localeParserRef: ParserLocaleObjectRef; theToken: MultiChoiceToken; VAR nChoices: ItemCount): OSStatus; C;
  224. FUNCTION GetMultiChoiceToken(refLexicalIterator: TTextParseRef; theToken: MultiChoiceToken; VAR theChoices: ParsingToken): OSStatus; C;
  225. FUNCTION GetLocaleMultiChoiceToken(localeParserRef: ParserLocaleObjectRef; theToken: MultiChoiceToken; VAR theChoices: ParsingToken): OSStatus; C;
  226. FUNCTION GetCountTokenComposition(refLexicalIterator: TTextParseRef; theToken: ComposedToken; VAR nChoices: ItemCount): OSStatus; C;
  227. FUNCTION GetLocaleCountTokenComposition(localeParserRef: ParserLocaleObjectRef; theToken: ComposedToken; VAR nChoices: ItemCount): OSStatus; C;
  228. FUNCTION GetComposedToken(refLexicalIterator: TTextParseRef; theToken: ComposedToken; VAR theComposition: ParsingToken): OSStatus; C;
  229. FUNCTION GetLocaleComposedToken(localeParserRef: ParserLocaleObjectRef; theToken: ComposedToken; VAR theChoices: ParsingToken): OSStatus; C;
  230. {$ENDC}
  231. {$ALIGN RESET}
  232. {$POP}
  233.  
  234. {$SETC UsingIncludes := TextParserIncludes}
  235.  
  236. {$ENDC} {__TEXTPARSER__}
  237.  
  238. {$IFC NOT UsingIncludes}
  239.  END.
  240. {$ENDC}
  241.